import networkx as nx
import pandas as pd
import numpy as np


# This is the set of employees
employees = set(['Pablo',
                 'Lee',
                 'Georgia',
                 'Vincent',
                 'Andy',
                 'Frida',
                 'Joan',
                 'Claude'])

# This is the set of movies
movies = set(['The Shawshank Redemption',
              'Forrest Gump',
              'The Matrix',
              'Anaconda',
              'The Social Network',
              'The Godfather',
              'Monty Python and the Holy Grail',
              'Snakes on a Plane',
              'Kung Fu Panda',
              'The Dark Knight',
              'Mean Girls'])


# you can use the following function to plot graphs
# make sure to comment it out before submitting to the autograder
def plot_graph(G, weight_name=None):
    '''
    G: a networkx G
    weight_name: name of the attribute for plotting edge weights (if G is weighted)
    '''
    #%matplotlib notebook
    import matplotlib.pyplot as plt
    
    plt.figure()
    pos = nx.spring_layout(G)
    edges = G.edges()
    weights = None
    
    if weight_name:
        weights = [int(G[u][v][weight_name]) for u,v in edges]
        labels = nx.get_edge_attributes(G,weight_name)
        nx.draw_networkx_edge_labels(G,pos,edge_labels=labels)
        nx.draw_networkx(G, pos, width=weights);
    else:
        nx.draw_networkx(G, pos,);


              
                def answer_one():
    # YOUR CODE HERE
    g = nx.read_edgelist('assets/Employee_Movie_Choices.txt', delimiter='\t')
    return g
    raise NotImplementedError()


              
                assert type(answer_one()) == nx.Graph , "Your return type should be a Graph object"


              
                def answer_two():
    # YOUR CODE HERE
    g = answer_one()
    for node in g.nodes():
        if node in employees:
            g.add_node(node, type='employee')
        else:
            g.add_node(node, type='movie')
    return g
    raise NotImplementedError()


              
                assert type(answer_two()) == nx.Graph , "Your return type should be a Graph object"


              
                def answer_three():
    # YOUR CODE HERE
    from networkx.algorithms import bipartite
    b = answer_two()
    g = bipartite.weighted_projected_graph(b, employees)
    return g
    raise NotImplementedError()


              
                G = answer_three()
plot_graph(G, weight_name="weight")
G.edges(data = True)

EdgeDataView([('Vincent', 'Frida', {'weight': 2}), ('Vincent', 'Pablo', {'weight': 1}), ('Andy', 'Claude', {'weight': 1}), ('Andy', 'Lee', {'weight': 1}), ('Andy', 'Joan', {'weight': 1}), ('Andy', 'Frida', {'weight': 1}), ('Andy', 'Pablo', {'weight': 1}), ('Andy', 'Georgia', {'weight': 1}), ('Claude', 'Georgia', {'weight': 3}), ('Lee', 'Joan', {'weight': 3}), ('Frida', 'Pablo', {'weight': 2})])


              
                assert type(answer_three()) == nx.Graph , "Your return type should be a Graph object"


              
                def answer_four():
    # YOUR CODE HERE
    relationship = nx.read_edgelist('assets/Employee_Relationships.txt', data=[('relationship_score', int)])
    r_df = pd.DataFrame(relationship.edges(data=True), columns = ['From', 'To', 'relationship_score'])
    
    g = answer_three()
    g_df = pd.DataFrame(g.edges(data=True), columns=['From', 'To', 'movie_score'])
    
    g_df_2 = g_df.copy()
    
    g_df_2.rename(columns={"From":"From_", "To":"From"}, inplace=True)
    g_df_2.rename(columns={"From_":"To"}, inplace=True)
    
    g_df_3 = pd.concat([g_df, g_df_2])
    
    df = pd.merge(g_df_3, r_df, on = ['From', 'To'], how = 'right')
    df['movie_score'] = df['movie_score'].map(lambda x: x['weight'] if type(x)==dict else None)
    df['relationship_score'] = df['relationship_score'].map(lambda x: x['relationship_score'])
    df['movie_score'].fillna(value=0, inplace=True)
    
    return df['movie_score'].corr(df['relationship_score'])
    raise NotImplementedError()


              
                relationship = nx.read_edgelist('assets/Employee_Relationships.txt', data=[('relationship_score', int)])
r_df = pd.DataFrame(relationship.edges(data=True), columns = ['From', 'To', 'relationship_score'])

g = answer_three()
g_df = pd.DataFrame(g.edges(data=True), columns=['From', 'To', 'movie_score'])

g_df_2 = g_df.copy()

g_df_2.rename(columns={"From":"From_", "To":"From"}, inplace=True)
g_df_2.rename(columns={"From_":"To"}, inplace=True)

g_df_3 = pd.concat([g_df, g_df_2])

df = pd.merge(g_df_3, r_df, on = ['From', 'To'], how = 'right')
df['movie_score'] = df['movie_score'].map(lambda x: x['weight'] if type(x)==dict else None)
df['relationship_score'] = df['relationship_score'].map(lambda x: x['relationship_score'])
df['movie_score'].fillna(value=0, inplace=True)


              
                ans_four = answer_four()

Assignment 1 - Creating and Manipulating Graphs¶

Question 1¶

Question 2¶

Question 3¶

Question 4¶